require(magrittr)
require(knitr)
require(dplyr)
require(tidyr)
require(tibble)
require(ggplot2)
require(Seurat)
require(cowplot)
require(kableExtra)
require(readr)
require(readxl)
require(pochi)
parent.directory <- "intermediate/"
parent.directory.mtx <- "filtered/"

A. GC and ASC extraction

A.1. Getting cell identifiers from large Seurat object and re-running analysis

We take our GC and antibody-secreting cell (ASC) identifiers from our orignal Seurat dataset. We then get the information from the original 10X files, only taking the cell identifiers from the GC and ASCs.

load(file = paste0(parent.directory, "sct_cluster_metadata.RData"))
GCPC_metadata %>%
  filter(parent_cluster != "B PLCG2") -> GCPC_metadata
list_identifiers <- c("TC124.1", "TC124.2", "TC124.3", "TC125", "TC126")
cell_barcodes_list <- lapply(1:length(list_identifiers), function(i){
  GCPC_metadata %>%
    filter(orig_ident == list_identifiers[i]) %>%
    select(identifier) %>%
    mutate(raw_barcode = sub(paste0(list_identifiers[i], "_"), "", identifier)) %>%
    pull(raw_barcode) -> raw_barcodes
  return(raw_barcodes)
})
TC124.1.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit1"), strip.suffix = TRUE)
TC124.2.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit7"), strip.suffix = TRUE)
TC124.3.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit8"), strip.suffix = TRUE)
TC125.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit2"), strip.suffix = TRUE)
TC126.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit3"), strip.suffix = TRUE)

B.names <- c("TC124.1", "TC124.2", "TC124.3", "TC125", "TC126")
B.list <- list(TC124.1.data, TC124.2.data, TC124.3.data, TC125.data, TC126.data)
rm(list = ls()[grepl(".data", ls(), fixed = TRUE)])

B.list <- lapply(1:length(B.list), function(i){
  B.list[[i]] <- B.list[[i]][,cell_barcodes_list[[i]]]
  colnames(B.list[[i]]) <- paste0(B.names[i], "_", colnames(B.list[[i]]))
  return(CreateSeuratObject(B.list[[i]], project = B.names[[i]], min.cells = 3, min.features = 200))
})

A.2. Re-run SCTransform normalization and integration

Now that we have our new Seurat objects, we will add metadata and then will re-perform our SCTransform normalization and the Seurat v3 integration algorithm.

read_excel("genesets/dissociation_genes.xlsx") %>% select('Human ID') %>% na.omit %>% pull("Human ID") -> dissociation.genes

for(i in 1:length(B.list)){
  temp.dissoc.genes <- dissociation.genes[dissociation.genes %in% rownames(B.list[[i]])]
  B.list[[i]][["percent.dissoc"]] <- PercentageFeatureSet(B.list[[i]], features = temp.dissoc.genes)
  B.list[[i]][["percent.mt"]] <- PercentageFeatureSet(B.list[[i]], pattern = "^MT-")
  B.list[[i]][["donor"]] <- substring(B.list[[i]]$orig.ident, 1, 5)
}
#This removes the unused data thus far
rm(i, temp.dissoc.genes)

B.list <- list(merge(x = B.list[[1]], y = B.list[2:3]), B.list[[4]], B.list[[5]])
B.list <- lapply(1:length(B.list), function(i){
  return(SCTransform(B.list[[i]], verbose = TRUE))
})
B.features <- SelectIntegrationFeatures(object.list = B.list, nfeatures = 3000)
B.list <- PrepSCTIntegration(object.list = B.list, anchor.features = B.features, verbose = TRUE)
B.anchors <- FindIntegrationAnchors(object.list = B.list, normalization.method = "SCT", anchor.features = B.features, dims = 1:40, verbose = TRUE)
B.gcpc <- IntegrateData(anchorset = B.anchors, normalization.method = "SCT", dims = 1:40, verbose = TRUE) 
save(B.gcpc, GCPC_metadata, file = paste0(parent.directory, "sct_gcpc_integrated.RData"))

A.3. Re-run PCA, UMAP, and Louvain clustering on smaller dataset.

We perform PCA and UMAP and Louvain clustering just as before. We will also perform Louvain clustering at a single resolution, followed by scoring of specific gene sets just as in the original Seurat dataset.

load(file = paste0(parent.directory, "sct_gcpc_integrated.RData"))
B.gcpc[["SCT"]] <- NULL
IG.genes <- grep("^IGL|^IGK|^IGHV", B.gcpc@assays$integrated@var.features, value = TRUE)
B.gcpc@assays$integrated@var.features <- setdiff(B.gcpc@assays$integrated@var.features, c(IG.genes))
B.gcpc <- RunPCA(B.gcpc, npcs = 50, verbose = TRUE)
dims.to.use <- 1:30
B.gcpc <- BuildAnnoyUMAP(B.gcpc,
                         reduction = "pca",
                         metric = "euclidean",
                         dims = dims.to.use,
                         reduction.name = "pca_UMAP",
                         reduction_key  = "pcaumap_",
                         save.graphs  = TRUE)
DefaultAssay(B.gcpc) <- "RNA"
B.gcpc <- NormalizeData(B.gcpc)

B.gcpc <- FindClusters(B.gcpc, resolution = 0.5, graph.name = "annoy_nn_snn")
B.gcpc$previous_idents <- plyr::mapvalues(rownames(B.gcpc@meta.data), from = GCPC_metadata$identifier, to = as.character(GCPC_metadata$parent_cluster))

save(B.gcpc, file = paste0(parent.directory,"sct_seurat_gcpc_clustered.RData"))

A.4. Finding marker genes for the new clusters.

We use the Seurat FindAllMarkers function and SoupX to find two sets of marker genes for our new clusters.

GCPC_RNA_markers <- FindAllMarkers(B.gcpc, assay = "RNA", logfc.threshold = 0.3, test.use = "LR", verbose = TRUE, only.pos = FALSE, latent.vars = "donor")
GCPC_SoupX_markers <- SoupX::quickMarkers(B.gcpc[["RNA"]]@data, B.gcpc$seurat_clusters, N = 20)
save(GCPC_RNA_markers, GCPC_SoupX_markers, file = paste0(parent.directory, "sct_seurat_gcpc_markers.RData"))

B. Data visualization

B.1 Cluster visualization full

We can now visualize the new clusters and the previous clusters.

load(paste0(parent.directory,"sct_seurat_gcpc_clustered.RData"))
load(paste0(parent.directory,"sct_seurat_gcpc_markers.RData"))
DimPlot(B.gcpc, reduction = "pca_UMAP", label = TRUE, label.size = 5, label.box = TRUE, pt.size = 0.8, group.by = "previous_idents")
DimPlot(B.gcpc, reduction = "pca_UMAP", label = TRUE, label.size = 5, label.box = TRUE, pt.size = 0.8)

B.2 SoupX markers

We will visualize the SoupX markers here.

cluster_list <- sort(unique(B.gcpc$seurat_clusters))
n_clusters <- length(cluster_list)
mrks <- SoupX::quickMarkers(B.gcpc[["RNA"]]@data, B.gcpc$seurat_clusters, N = 20)
## as(<dgCMatrix>, "dgTMatrix") is deprecated since Matrix 1.5-0; do as(., "TsparseMatrix") instead
mrks_list <- lapply(cluster_list, function(i){
 mrks %>%
   dplyr::filter(cluster == i) %>%
   dplyr::pull(gene)
})
dotplot_list <- lapply(1:n_clusters, function(i) {
  cluster_name <- cluster_list[i]
  temp.slot <- paste0("Cluster ", cluster_name)
  if(length(mrks_list[[{{i}}]]) == 0){return(NULL)}
  src <- c("#### {{temp.slot}} {.unnumbered}",
           "```{r soupdotplot-{{i}}, fig.width = 12, fig.height = 6}",
           "DotPlot(B.gcpc, features = mrks_list[[{{i}}]], assay = \"RNA\")+RotatedAxis()",
           "```",
           "")
  knit_expand(text = src)
})
out <- knit_child(text = unlist(dotplot_list), options = list(echo = FALSE, cache = FALSE))

Cluster 0

Cluster 1

Cluster 2

Cluster 3

Cluster 4

Cluster 5

Cluster 6

Cluster 7

Cluster 8

Cluster 9

Cluster 10

Cluster 11

B.3 Seurat markers

DefaultAssay(B.gcpc) <- "RNA"
cluster_list <- levels(GCPC_RNA_markers$cluster)
n_clusters <- length(cluster_list)
gene_list <- lapply(cluster_list, function(i){
  GCPC_RNA_markers %>%
    dplyr::filter(p_val_adj < 0.01) %>%
    dplyr::filter(cluster == i) %>%
    dplyr::top_n(20, wt = avg_log2FC) %>%
    dplyr::pull(gene)
})
dotplot_list <- lapply(1:n_clusters, function(i) {
  cluster_name <- cluster_list[i]
  temp.slot <- paste0("Cluster ", cluster_name)
  if(length(gene_list[[i]]) == 0 ){
    return(NULL)
  }
  src <- c("#### {{temp.slot}} {.unnumbered}",
           "```{r dotplot-{{i}}, fig.width = 12, fig.height = 6}",
           "DotPlot(B.gcpc, features = gene_list[[{{i}}]])+RotatedAxis()+scale_color_viridis_c(direction = -1, option = \"A\")",
           "```",
           "")
  knit_expand(text = src)
})
out <- knit_child(text = unlist(dotplot_list), options = list(echo = FALSE, cache = FALSE))

Cluster 0

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 1

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 2

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 3

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 4

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 5

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 6

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 7

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 8

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 9

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 10

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

Cluster 11

## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

B.4 Exploring marker genes for each cluster

We include tables of the marker genes found in each cluster.

cluster_vector <- levels(B.gcpc@active.ident)
n_clusters <- length(cluster_vector)

markers.list <- lapply(1:n_clusters, function(i) {
  GCPC_RNA_markers %>%
    dplyr::filter(cluster == cluster_vector[i] & p_val_adj < 0.01) %>%
    dplyr::arrange(-avg_log2FC) %>%
    dplyr::select(cluster, gene, avg_log2FC, p_val, p_val_adj) %>%
    dplyr::mutate_if(is.numeric, format, digits = 3)
})

names(markers.list) <- paste("Cluster", cluster_vector)
markers.list <- c(markers.list, "ALL" = list(GCPC_RNA_markers %>%  dplyr::arrange(-avg_log2FC) %>% select(cluster, gene, avg_log2FC, p_val, p_val_adj) %>% mutate_if(is.numeric, format, digits = 3)))

src_list <- lapply(1:(n_clusters+1), function(i) {
  if(i == (n_clusters+1)){
    label <- "ALL"
  } else {
    label <- cluster_vector[i]
  }
  src <- c("#### {{label}} {.unnumbered} ",
           "<div style = \"width:65%; height:auto; align:left\">",
           "```{r marker-cluster-{{label}}, fig.width=3}",
           "DT::datatable(markers.list[[{{i}}]], rownames = FALSE)",
           "```",
           "</div>",
           "")
  knit_expand(text = src)
})
out <- knit_child(text = unlist(src_list), options = list(echo = FALSE, cache = FALSE))

0

1

2

3

4

5

6

7

8

9

10

11

ALL

## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

B.5 Cluster renaming

The cluster identities remain largely the same as before.

#load(file = paste0(parent.directory, "sct_seurat_gcpc_clustered.RData"))
cluster_levels <- paste0("C",c(1,8,5,11,7,6,4,9,2,3,10,0))
cluster_names <- c("GC 1",
                   "GC 2",
                   "GC 3",
                   "GC IgA",
                   "LZ",
                   "DZ 1",
                   "DZ 2",
                   "DZ 3", 
                   "DZ 4",
                   "GC LMO2",
                   "ASC IgM",
                   "ASC IgG")
B.gcpc$new_clusters_nums <- factor(paste0("C", as.character(B.gcpc$annoy_nn_snn_res.0.5)),
                                   levels = cluster_levels)
B.gcpc$cluster_names <- plyr::mapvalues(B.gcpc$new_clusters_nums,
                                        from = cluster_levels,
                                        to = cluster_names)
Idents(B.gcpc) <- "cluster_names"
DimPlot(B.gcpc, label = TRUE, label.box = TRUE, repel = TRUE, cols = c("dodgerblue", "forestgreen", RColorBrewer::brewer.pal(11, "Set3")))+NoLegend()
save(B.gcpc, file = paste0(parent.directory, "sct_seurat_gcpc_named_clusters.RData"))
load(file = paste0(parent.directory, "sct_seurat_gcpc_named_clusters.RData"))
DimPlot(B.gcpc, label = TRUE, label.box = TRUE, repel = TRUE, cols = c("dodgerblue", "forestgreen", RColorBrewer::brewer.pal(11, "Set3")))+NoLegend()

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] pochi_0.1.0        readxl_1.4.1       readr_2.1.3        kableExtra_1.3.4  
##  [5] cowplot_1.1.1      sp_1.5-0           SeuratObject_4.1.2 Seurat_4.2.0      
##  [9] ggplot2_3.3.6      tibble_3.1.8       tidyr_1.2.1        dplyr_1.0.10      
## [13] knitr_1.40         magrittr_2.0.3    
## 
## loaded via a namespace (and not attached):
##   [1] SoupX_1.6.1           Rtsne_0.16            colorspace_2.0-3     
##   [4] deldir_1.0-6          ellipsis_0.3.2        ggridges_0.5.4       
##   [7] rstudioapi_0.14       spatstat.data_2.2-0   farver_2.1.1         
##  [10] leiden_0.4.3          listenv_0.8.0         DT_0.25              
##  [13] ggrepel_0.9.1         fansi_1.0.3           xml2_1.3.3           
##  [16] codetools_0.2-18      splines_4.1.1         cachem_1.0.6         
##  [19] polyclip_1.10-0       jsonlite_1.8.2        ica_1.0-3            
##  [22] cluster_2.1.4         png_0.1-7             rgeos_0.5-9          
##  [25] uwot_0.1.14           shiny_1.7.2           sctransform_0.3.5    
##  [28] spatstat.sparse_2.1-1 compiler_4.1.1        httr_1.4.4           
##  [31] assertthat_0.2.1      Matrix_1.5-1          fastmap_1.1.0        
##  [34] lazyeval_0.2.2        cli_3.4.1             later_1.3.0          
##  [37] htmltools_0.5.3       tools_4.1.1           igraph_1.3.5         
##  [40] gtable_0.3.1          glue_1.6.2            RANN_2.6.1           
##  [43] reshape2_1.4.4        Rcpp_1.0.9            scattermore_0.8      
##  [46] cellranger_1.1.0      jquerylib_0.1.4       vctrs_0.4.2          
##  [49] svglite_2.1.0         nlme_3.1-159          progressr_0.11.0     
##  [52] crosstalk_1.2.0       lmtest_0.9-40         spatstat.random_2.2-0
##  [55] xfun_0.33             stringr_1.4.1         globals_0.16.1       
##  [58] rvest_1.0.3           mime_0.12             miniUI_0.1.1.1       
##  [61] lifecycle_1.0.3       irlba_2.3.5.1         goftest_1.2-3        
##  [64] future_1.28.0         MASS_7.3-58.1         zoo_1.8-11           
##  [67] scales_1.2.1          spatstat.core_2.4-4   hms_1.1.2            
##  [70] promises_1.2.0.1      spatstat.utils_2.3-1  parallel_4.1.1       
##  [73] RColorBrewer_1.1-3    yaml_2.3.5            reticulate_1.26      
##  [76] pbapply_1.5-0         gridExtra_2.3         sass_0.4.2           
##  [79] rpart_4.1.16          stringi_1.7.8         highr_0.9            
##  [82] systemfonts_1.0.4     rlang_1.0.6           pkgconfig_2.0.3      
##  [85] matrixStats_0.62.0    evaluate_0.17         lattice_0.20-45      
##  [88] ROCR_1.0-11           purrr_0.3.5           tensor_1.5           
##  [91] labeling_0.4.2        patchwork_1.1.2       htmlwidgets_1.5.4    
##  [94] tidyselect_1.2.0      parallelly_1.32.1     RcppAnnoy_0.0.19     
##  [97] plyr_1.8.7            R6_2.5.1              generics_0.1.3       
## [100] DBI_1.1.3             mgcv_1.8-40           pillar_1.8.1         
## [103] withr_2.5.0           fitdistrplus_1.1-8    survival_3.4-0       
## [106] abind_1.4-5           future.apply_1.9.1    KernSmooth_2.23-20   
## [109] utf8_1.2.2            spatstat.geom_2.4-0   plotly_4.10.0        
## [112] tzdb_0.3.0            rmarkdown_2.17        grid_4.1.1           
## [115] data.table_1.14.2     webshot_0.5.4         digest_0.6.29        
## [118] xtable_1.8-4          httpuv_1.6.6          munsell_0.5.0        
## [121] viridisLite_0.4.1     bslib_0.4.0